import json
import time
import scrapy

from ..items import Spider04Item


class Getimg04Spider(scrapy.Spider):
    name = "getimg04"
    #指定运行某个管道
    custom_settings = {
        'ITEM_PIPELINES':{'spider04.pipelines.MyImagesPipeline': 200
    }
        }
    allowed_domains = ["movie.douban.com"]
    start_urls = ["https://movie.douban.com/cinema/nowplaying/shenzhen"]

    def parse(self, response):
        # item = {}
        # item["url"] = "getdata04"
        # url_list = response.xpath('//*[@id="nowplaying"]/div[2]/ul/li/ul')
        # for i in url_list:
        #     item ={}
        #     item['url'] = i.xpath('.//li/a/img/@src').extract_first()
        #     print(item['url'])
        #
        #     yield item
        url_list = response.xpath('//*[@id="nowplaying"]/div[2]/ul/li/ul')
        item = Spider04Item()
        item['url'] = []
        item['name'] = []
        count = 0
        for i in url_list:
            img_url = i.xpath('.//li/a/img/@src').extract_first()
            name = i.xpath('.//li[2]/a/@title').extract_first()
            # print(img_url)
            item['url'].append(img_url)
            item['name'].append(name)
            count +=1
        # print(item)
            time.sleep(0.2)
            print('爬取完成'+"第"+str(count)+"张"+"---->"+name + '.jpg')
        print("一共"+str(count)+"张图片")
        yield item
        # print(item['url'])